* cgmrif2.ado	JEP		1/24/2015
* Does RIF regressions and bootstraps with Cameron - Gelbach - Miller scheme.
* Modified from cgmrif.ado, to build on myrifreg.ado and to add later steps in rif4.do, to avoid repetition

program define cgmrif3, eclass
version 13.1
syntax varlist(fv)  [if] [in] [pw], hyp(real) boot(integer) cluster(varname) x(varname) i(varname) t(varname) q(integer) rif(name) [kopts(passthru) saving(string) state(string)]

tempfile main bootsave
tempname epshat yhat temp_y epshat_imposed yhat_imposed atx dy T inreg

local hypothesis = `hyp' 
gettoken lhs rhs: varlist
marksample touse

myrifreg `varlist' if `touse' [`weight'`exp'], q(`q') cluster(`cluster') `kopts' rif(`rif')
est store `inreg'

global mainbeta = _b[`x']
global mainse = _se[`x']
global maint = (_b[`x'] - `hyp') / _se[`x'] 
qui predict `epshat' , resid
qui predict `yhat' , xb 

/* also generate "impose the null hypothesis" yhat and residual */
qui gen `temp_y' = `rif' - `x' * `hyp' if `touse'
local controls: subinstr local rhs "`x'" "", all
qui reg `temp_y' `controls' if `touse'
qui predict `epshat_imposed' if `touse' , resid 
qui predict `yhat_imposed' if `touse' , xb 
qui replace `yhat_imposed' = `yhat_imposed' + `x' * `hyp' if `touse'

sort `i' `t'
preserve 
keep if `touse'
qui save `main' , replace 
restore

preserve
qui by `i': keep if _n == 1 & `touse'
qui summ 
global numccode = r(N) 

cap erase `bootsave' 
cap postutil clear
* qui postfile bskeep beta_np t_np t_wild using `bootsave' , replace
qui postfile bskeep coef t_wild using `bootsave' , replace 

glo state = c(rngstate)

if "`state'"!="" set rngstate `state'

forvalues b = 1/`boot' { 
	* if `b'==1 noi _dots 0
	* noi _dots `b' 0
	/* first do wild bootstrap */
	use `main', replace
	qui by `i': gen temp = uniform() 
	qui by `i': gen pos = (temp[1] < .5)
	/*  these two lines (and commendted t-stat below are if you don't want to impose the null hypothesis*/
	/*gen wildresid = `epshat' * (2*pos - 1) 
	gen wildy = `yhat' + wildresid 	*/
	qui gen wildresid = `epshat_imposed' * (2*pos - 1)  
	qui gen wildy = `yhat_imposed' + wildresid  
	qui reg wildy `rhs' , cluster(`cluster')
	loc bscoef= _b[`x']
	local bst_wild = (_b[`x'] - `hypothesis') / _se[`x'] 
	*local bst_wild = (_b[treat] - $mainbeta) / _se[treat] 
	
	/* next do nonparametric bootstrap */
	/* bsample , cluster(`cluster') idcluster(new`cluster') 
	_pctile `lhs', p(`q')
	glo p=r(r1)
	cap drop `atx'
	qui gen `atx'=$p in 1/1
	cap drop `dy'
	qui kdens `lhs', gen(`dy') kernel(gaussian) nograph at(`atx')
	cap drop `T' `rif'
	* RIF
	gen `T'= (`lhs'>$p)
	qui sum `dy' in 1/1, meanonly
	glo fy=r(mean)
	gen `rif'=$p + `T'/$fy 
	
	local newrhs : subinstr local rhs "`cluster'" "new`cluster'", all
	qui reg `rif' `newrhs',  cluster(new`cluster') 
	local bsbeta = _b[`x'] 
	local bst = (_b[`x'] - $mainbeta) / _se[`x'] */

	* post bskeep (`bsbeta') (`bst') (`bst_wild') 
	
	post bskeep (`bscoef') (`bst_wild') 
}  /* end of bootstrap reps */
qui postclose bskeep


qui drop _all 
qui set obs 1 
gen t_np = $maint 
gen t_wild = $maint 
qui append using `bootsave' 

gen initialsort=_n

qui gen n = . 
* foreach stat in t_np t_wild { 
foreach stat in t_wild { 
	qui summ `stat' 
	local bign = r(N) 
	sort `stat' 
	qui replace n = _n 
	qui summ n if abs(`stat' - $maint) < .000001 
	local myp = r(mean) / `bign' 
	global pctile_`stat' = 2 * min(`myp',(1-`myp'))
	_pctile `stat', p(2.5 97.5)
	glo tl`stat'=r(r1)
	glo th`stat'=r(r2)
}
sort initialsort 
save "`saving'", replace
restore

global mainp = normal($maint) 
global pctile_main = 2 * min($mainp,(1-$mainp)) 

local myfmt = "%7.5f" 



* Percentile-t CI at 95%
* global bl_np=$mainbeta+$tlt_np*$mainse
* global bh_np=$mainbeta+$tht_np*$mainse
global bl_wild=$mainbeta+$tlt_wild*$mainse
global bh_wild=$mainbeta+$tht_wild*$mainse

* Display bootstrap table

di "Number BS reps = $bootreps, Null hypothesis = `hypothesis'" 
display "Main beta" _column(13) "main T"	_column(22) "Main %le"	_column(33) "NP BS %le" _column(44) "wild %le" 
di 	%6.3f $mainbeta _column(13) %6.3f $maint  _column(23) 	`myfmt' $pctile_main _column(34) `myfmt' $pctile_t_np _column(44) `myfmt' $pctile_t_wild 
	
* Restore initial regression
est restore `inreg'
* Add statistics of interest

* ereturn local np=$pctile_t_np 
ereturn local tp=$pctile_t_wild
ereturn local bh_wild=$bh_wild
ereturn local bl_wild=$bl_wild
ereturn local state="$state"
* ereturn local bh_np=$bh_np
* ereturn local bl_np=$bl_np


/* 
return scalar mainbeta=$mainbeta
return scalar maint=$maint
return scalar pctile_main=$pctile_main
* return scalar pctile_t_np =$pctile_t_np 
return scalar pctile_t_wild=$pctile_t_wild
* return scalar tlt_np=$tlt_np
* return scalar tht_np=$tht_np
return scalar tlt_wild=$tlt_wild
return scalar tht_wild=$tht_wild
* return scalar bl_np=$bl_np
* return scalar bh_np=$bh_np
return scalar bl_wild=$bl_wild
return scalar bh_wild=$bh_wild
*/
end
